package com.attilax.spider;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.attilax.io.filex;

public class JsoupTest {

	public static void main(String[] args) {
		Document doc = null;
		String html=filex.read("c:\\知道日报_百度知道.html", "gbk");
		doc = Jsoup.parse(html);
		String title=doc.title();
	String html2=	doc.html();
	
	Elements as = doc.getElementsByTag("a");
	for (Element e : as) {
		String href=  e.attr("href");
		String txt=e.text();
		System.out.println(txt);
	}
	
	
	}

}
